/******************************************************************************
Paper: The Impact of Welfare on Intergroup Relations
Author: Akshay Dixit

CPHS: This .do file starts with the merged data and prepares it for analysis by
creating variables for household-level characteristics.
******************************************************************************/

clear all
gl data_cmie "$identity/data/cphs"

u "$data_cmie/merged_data.dta", clear

******************************************************************************

*** Household characteristics: State, district, caste, religion ***

isid hh_id
count

* Family migration in the CPHS 
unab family_shifted: family_shifted_*
foreach var of local family_shifted {
	replace `var' = "1" if `var' == "Y"
	replace `var' = "0" if `var' == "N"
	destring `var', replace
}

egen family_shifted_total = rowtotal(family_shifted_*), missing
tab family_shifted_total	

/*
	By default, household characteristics are stored in wave-specific variables.
	
	To facilitate analysis, collapse these wave-specific variables into single ones
		(e.g., for state, district, caste, religion). 
		Wave-specific analysis is not warranted given that hardly anyone 
		(<1% respondents) report migrating.
*/
	
ren state_may17 state
ren district_may17 district
ren region_type_may17 region_type
ren caste_may17 caste
ren caste_nonmissing_may17 caste_nonmissing
ren caste_category_may17 caste_category
ren caste_category_nonmissing_may17 caste_category_nonmissing
ren religion_may17 religion

unab states: state_*
foreach s of local states {
	replace state = `s' if state == ""
} 

unab districts: district_*
foreach d of local districts {
	replace district = `d' if district == ""
}

unab region_types: region_type_*
foreach r of local region_types {
	replace region_type = `r' if region_type == ""
}

unab caste_nonmissing: caste_nonmissing_*17 caste_nonmissing_*18 caste_nonmissing_*19
foreach c of local caste_nonmissing {
	replace caste_nonmissing = `c' if caste_nonmissing == ""
}

unab caste_category_nonmissing: caste_category_nonmissing_*17 caste_category_nonmissing_*18 caste_category_nonmissing_*19
foreach var of local caste_category_nonmissing {
	replace caste_category_nonmissing = `var' if caste_category_nonmissing == ""
}

local caste_category caste_category_jun17 caste_category_jul17 caste_category_aug17 caste_category_sep17 caste_category_oct17 caste_category_nov17 caste_category_dec17 caste_category_jan18 caste_category_feb18 caste_category_mar18 caste_category_apr18 caste_category_may18 caste_category_jun18 caste_category_jul18 caste_category_aug18 caste_category_sep18 caste_category_oct18 caste_category_nov18 caste_category_dec18 caste_category_jan18 caste_category_feb19 caste_category_mar19 caste_category_apr19
foreach var of local caste_category {
	replace caste_category = `var' if caste_category == ""
}

local caste caste_jun17 caste_jul17 caste_aug17 caste_sep17 caste_oct17 caste_nov17 caste_dec17 caste_jan18 caste_feb18 caste_mar18 caste_apr18 caste_may18 caste_jun18 caste_jul18 caste_aug18 caste_sep18 caste_oct18 caste_nov18 caste_dec18 caste_jan19 caste_feb19 caste_mar19 caste_apr19
foreach c of local caste {
	replace caste = `c' if caste == ""
}

unab religion: religion_*17 religion_*18 religion_*19
foreach var of local religion {
	replace religion = `var' if religion == ""
}

drop state_* district_* region_type_* religion_*

	/*
		Assuming that region type doesn't change.
		Again, this is a reasonable assumption, since 99% households never moved 
		in the period of analysis.
	*/

* State
g telangana = (state == "Telangana")
replace telangana = . if state == ""
lab var telangana "Telangana"
	
* District ID
encode district, gen(district_id)
codebook district_id

* Caste
g upper_caste = (caste_category_nonmissing == "Upper Caste")
g obc = (caste_category_nonmissing == "OBC")
g sc = (caste_category_nonmissing == "SC")
g st = (caste_category_nonmissing == "ST")
g intermediate_caste = (caste_category_nonmissing == "Intermediate Caste")

* Religion
g hindu = (religion == "Hindu")
g muslim = (religion == "Muslim")
g christian = (religion == "Christian")
g sikh = (religion == "Sikh")
g buddhist = (religion == "Buddhist")
g jain = (religion == "Jain")

g religion_nonmissing = religion
replace religion_nonmissing = "" if religion_nonmissing == "Not Applicable" | religion_nonmissing == "Religion not stated"

******************************************************************************

*** Farmers and laborers ***

	/*
		The CMIE says that agricultural laborers are likely landless, whereas
		farmers are likely landowning. 
		
		To implement this definition at the household level, the following code 
		classifies a household as:
			- "Farmer", if the household has at least one person whose occupation is 
			  reported as farmer.
			- "Laborer" if the household has at least one person whose occupation is
			  reported as laborer, AND,
			  zero persons whose occupation is reported as farmer.
	*/

local farmer num_farmer_may17 num_farmer_jun17 num_farmer_jul17 num_farmer_aug17 num_farmer_sep17 num_farmer_oct17 num_farmer_nov17 num_farmer_dec17 num_farmer_jan18 num_farmer_feb18 num_farmer_mar18 num_farmer_apr18
foreach var of local farmer {
	g has_`var' = (`var' > 0)
	replace has_`var' = . if `var' == .
}

local laborer num_laborer_may17 num_laborer_jun17 num_laborer_jul17 num_laborer_aug17 num_laborer_sep17 num_laborer_oct17 num_laborer_nov17 num_laborer_dec17 num_laborer_jan18 num_laborer_feb18 num_laborer_mar18 num_laborer_apr18
foreach var of local laborer {
	g has_`var' = (`var' > 0)
	replace has_`var' = . if `var' == .
}

egen sum_farmer = rowtotal(has_num_farmer*), missing
egen sum_laborer = rowtotal(has_num_laborer*), missing

g laborer = (sum_laborer > 0 & sum_farmer == 0)
replace laborer = . if sum_laborer == . & sum_farmer == .
	// sum_laborer & sum_farmer have the exact same missing values

g farmer = (sum_farmer > 0)
replace farmer = . if sum_farmer == .

******************************************************************************

*** Caste category: Club "intermediate" and "upper" into a single category ***

g caste_category_nonmissing2 = caste_category_nonmissing
replace caste_category_nonmissing2 = "Forward Caste" if caste_category_nonmissing2 == "Intermediate Caste" | caste_category_nonmissing2 == "Upper Caste"

******************************************************************************

save "$data_cmie/merged_data_clean.dta", replace

clear

